File: /www/wwwroot/biographybirthday.com/wp-scrap/spinbot_get_realitystarfacts.php
<?php
require 'vendor/autoload.php';
use Goutte\Client;
$url = 'https://realitystarfacts.com/keshia-knight-pulliam/';
if(isset($_GET['url'])){
$url = $_GET['url'];
}
$prefix = 'bio_';
$meta = array();
$client = new Client();
$crawler = $client->request('GET', $url);
$name = array_shift($crawler->filter('h1.single-title')->extract('_text'));
$name = preg_replace("#<\s*\/?(strong)\s*[^>]*?>#im", '', $name);
try{
$image = $crawler->filterXpath('//meta[@property="og:image"]')->attr('content');
}catch (Exception $e){
$image = '';
}
$count = 0;
$dob_valid = false;
$elements = $crawler->filter('table tr td')->each(function($node) use(&$count, &$meta, &$prefix, &$name, &$dob_valid){
if($count == 1){
$fullname = $node->html();
$names = splitName($fullname);
$name = $fullname;
$meta[$prefix.'first_name'] = $names['firstname'];
if($names['lastname'] != ''){
$meta[$prefix.'last_name'] = $names['lastname'];
}
}else if($count == 5){
if(cleanDate($node->html())){
$dob_valid = true;
$meta[$prefix.'dob'] = cleanDate($node->html());
}else{
return;
}
}else if($dob_valid && $count == 7){
$birth_place = $node->html();
$birth_place = str_replace('.','',$birth_place);
$meta[$prefix.'fact_birth-place'] = $birth_place;
}
$count++;
if($count > 9){
return;
}
});
$html = $crawler->filter('.thecontent')->html();
$html = preg_replace('/(<(script|style)\b[^>]*>).*?(<\/\2>)/is', "$1$3", $html);
$strip_tags = "center|style|span|ins|script|a|img|input|button|figure|noscript";
$remove_tags_and_content = array('table','ul','figcaption','div');
foreach($remove_tags_and_content as $tg){
$html = strip_tags_and_content($html, $tg);
}
$html = preg_replace('#<a.*?>.*?</a>#i', '', $html);
$html = preg_replace("#<\s*\/?(".$strip_tags.")\s*[^>]*?>#im", '', $html);
$textToSpin = str_replace('</div>','',$html);
$html = $textToSpin;
$response = spinbot($textToSpin);
// Make the response readable
list($strResponseHeaders, $strResponseBody) = explode("\r\n\r\n", $response, 2);
list($strResponseHeaders, $strResponseBody) = explode("\r\n\r\n", $strResponseBody, 2);
$aHeaders = putHeadersTextIntoArray($strResponseHeaders);
$out = $strResponseBody;
//$out = substr(strstr($strResponseBody, '<p>'), strlen('<p>'));
//$out = $html;
//$out = $strResponseBody;
$fields['name'] = $name;
$fields['image'] = $image;
$fields['meta'] = $meta;
$fields['body'] = $out;
$fields['available-spins'] = $aHeaders['available-spins'];
header('Content-Type: application/json');
echo json_encode($fields);
function strip_tags_and_content($html,$tag){
return preg_replace('/<'.$tag.'[^>]*>([\s\S]*?)<\/'.$tag.'[^>]*>/', '', $html);
}
function spinbot($textToSpin){
$url = 'https://api.spinbot.com';
$header = array();
$spinbotApiKey = '87b0d1bb9b3c420381ee141837cbc7f5';
$header[] = "x-auth-key:$spinbotApiKey";
$header[] = 'x-spin-cap-words:true';
$header[] = 'x-words-to-skip:rewrit,nonExistentWordPart';
$header[] = 'x-min-percent-change-per-sentence:any';
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $textToSpin);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
curl_setopt($ch, CURLOPT_VERBOSE, 1);
$response = curl_exec($ch);
curl_close($ch);
return $response;
}
function cleanDate($date){
$date = trim($date);
$date = str_replace(',','',$date);
$date = str_replace('.','',$date);
$date = str_replace('th','',$date);
$date = str_replace('st','',$date);
$date = str_replace('rd','',$date);
$date = str_replace('<sup></sup>','',$date);
try{
$dt = new DateTime($date);
}catch (Exception $e){
return false;
// debug($e);
}
return $dt->format('Y-m-d');
}
function putHeadersTextIntoArray($header_text) {
$headers = array();
foreach (explode("\r\n", $header_text) as $i => $line)
if ($i === 0) {
$headers['http_code'] = $line;
} else {
list ($key, $value) = explode(': ', $line);
$headers[$key] = $value;
}
return $headers;
}
function debug($arr, $exit = true){
echo '<pre>';
print_r($arr);
echo '</pre>';
if($exit) exit;
}
function splitName($name) {
// $name = preg_replace("#<\s*\/?(strong)\s*[^>]*?>#im", '', $name);
$parts = explode(' ', $name);
return array(
'firstname' => array_shift($parts),
'lastname' => array_pop($parts),
'middlename' => join(' ', $parts)
);
}
?>